# -*- coding: utf-8 -*-
"""
Created on Tue Apr 22 16:12:54 2025

@author: bramv
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
from sklearn.metrics import mean_absolute_error, mean_squared_error

#%% 1. LOAD DATA FROM EXCEL
cost_file = 'CAPEX DATA.xlsb'
cost_sheet = 'CAPEX'
df_cost = pd.read_excel(cost_file, sheet_name=cost_sheet, engine='pyxlsb')[['Year', 'PEM']].dropna()
df_cost.columns = ['Year', 'Cost_PEM']
df_cost = df_cost.apply(pd.to_numeric, errors='coerce').dropna()

forecast_file = "DEPLOYMENT FORECAST PEM.xlsx"
df_forecast = pd.read_excel(forecast_file)[['Year', 'Deployment_PEM']].dropna()
df_forecast = df_forecast.apply(pd.to_numeric, errors='coerce').dropna()

# Merge and sort
df_merged = pd.merge(df_cost, df_forecast, on='Year').sort_values('Year').reset_index(drop=True)
#%% 2. RUN WRIGHT'S LAW REGRESSION (3.2.1. Wright's Law Model)
def run_wrights_law(cost, capacity):
    log_cost = np.log(cost)
    log_capacity = np.log(capacity)
    slope, intercept, r_value, p_value, std_err = linregress(log_capacity, log_cost)
    residuals = log_cost - (intercept + slope * log_capacity)
    noise_std = np.std(residuals, ddof=1)
    r_squared = r_value**2
    return slope, std_err, noise_std, p_value, r_squared

b_pem, err_pem, sigma_pem, p_pem, r2_pem = run_wrights_law(df_merged['Cost_PEM'], df_merged['Deployment_PEM'])

#3.2.2. Statisical Evaluation and Hypothesis Testing
print("Experience exponent (b):", round(b_pem, 5))
print("Standard error of b:", round(err_pem, 4))
print("Noise standard deviation (σ):", round(sigma_pem, 4))
print("p-value:", round(p_pem, 4))
print("R²:", round(r2_pem, 4))


#%% 3. DETERMINISTIC FORECAST 

#Know anchor point (This is the end of known points and start point for forecasting)
year_anchor = 2023 #latest data point both deployment and costs are known
cost_anchor = df_merged[df_merged['Year'] == year_anchor]['Cost_PEM'].values[0]
deploy_anchor = df_merged[df_merged['Year'] == year_anchor]['Deployment_PEM'].values[0]
a_pem = np.log(cost_anchor) - b_pem * np.log(deploy_anchor)

forecast_start_year = 2024
start_index = forecast_start_year - df_forecast['Year'].values[0]
forecast_years_sub = df_forecast['Year'].values[start_index:]
forecast_deploy_sub = df_forecast['Deployment_PEM'].values[start_index:]

# Deterministic forecast for context
forecast_cost_sub = np.exp(a_pem + b_pem * np.log(forecast_deploy_sub)) #Applies wrights law
years_pem_plot = np.insert(forecast_years_sub, 0, year_anchor)
costs_pem_deterministic = np.insert(forecast_cost_sub, 0, cost_anchor)

plt.figure(figsize=(10, 6))
plt.plot(df_merged['Year'], df_merged['Cost_PEM'], 'o-', label='PEM Actual Cost', color='tab:orange')
plt.plot(years_pem_plot, costs_pem_deterministic, 'o--', label='Deterministic Forecast', color='darkorange')
plt.yscale('log')
plt.yticks([100, 1000, 10000])
plt.ylim(100, 10000)
plt.title("PEM Electrolyzer CAPEX Forecast Using Wright's Law")
plt.xlabel("Year")
plt.ylabel("CAPEX (EUR/kW, log scale)")
plt.grid(True, which='both', linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

# Print deterministic forecast values for 2030 and 2050
for target_year in [2030, 2050]:
    if target_year in forecast_years_sub:
        idx = np.where(forecast_years_sub == target_year)[0][0]
        forecast_value = forecast_cost_sub[idx]
        print(f"{target_year} deterministic forecast: {forecast_value:.2f} EUR/kW")
    else:
        print(f"{target_year} not found in forecast years.")


#%% 4. STOCHASTIC SIMULATION (3.2.3. Stochastic Forecasting with Shock Propagation)
def simulate_wrights_law(log_cost_0, log_deploy_diff, b, sigma, n_sim=1000):
    n_steps = len(log_deploy_diff)
    paths = np.zeros((n_sim, n_steps))
    for i in range(n_sim):                              #Loop over each simulation
        log_cost = log_cost_0                           #Start from the known log-cost at the anchor year 
        for t in range(n_steps):                        #Loop over each future year
            shock = np.random.normal(0, sigma)          #Draw  a random 'shock' from N(O, sigma)
            log_cost += b * log_deploy_diff[t] + shock  # Update cost with Wrights law + noise 
            paths[i, t] = log_cost                      #Store result
    return np.exp(paths)

log_cost_0 = np.log(cost_anchor)
log_deploy_0 = np.log(deploy_anchor)

deploy_forecast = df_forecast[df_forecast['Year'] >= 2024]['Deployment_PEM'].values
log_deploy_forecast = np.log(deploy_forecast)
log_deploy_diff = np.diff(np.insert(log_deploy_forecast, 0, log_deploy_0))
years_forecast = df_forecast[df_forecast['Year'] >= 2024]['Year'].values

cost_paths = simulate_wrights_law(log_cost_0, log_deploy_diff, b_pem, sigma_pem, n_sim=1000)
cost_median = np.median(cost_paths, axis=0)
cost_p05 = np.percentile(cost_paths, 2.5, axis=0)
cost_p95 = np.percentile(cost_paths, 97.5, axis=0)
cost_p25 = np.percentile(cost_paths, 25, axis=0)
cost_p75 = np.percentile(cost_paths, 75, axis=0)

# Add 2023 anchor to forecast
costs_pem_plot = np.insert(cost_median, 0, cost_anchor)
years_pem_plot = np.insert(years_forecast, 0, year_anchor)

#%% 5. PLOT STOCHASTIC RESULTS (4.2 Costs Projections AEC and PEM)
plt.figure(figsize=(10, 6))

# Plot actual ALK cost
plt.plot(df_merged['Year'], df_merged['Cost_PEM'], 'o-', color='tab:orange', label='PEM Actual Cost')

# Split the median forecast at 2023
split_index = np.where(np.array(years_pem_plot) > 2023)[0][0]
years_actual_median = years_pem_plot[:split_index]
costs_actual_median = costs_pem_plot[:split_index]
years_forecast_median = years_pem_plot[split_index:]
costs_forecast_median = costs_pem_plot[split_index:]

# Plot forecast median:
#  - before 2024: with dots
plt.plot(years_actual_median, costs_actual_median, 'o-', color='tab:orange', label='Stochastic Forecast')
#  - after 2023: dashed line only
plt.plot(years_forecast_median, costs_forecast_median, '--', color='tab:orange', linewidth= 3)

# Plot prediction intervals
plt.fill_between(years_forecast, cost_p25, cost_p75, color='tab:orange', alpha=0.25, label='50% Prediction Interval')
plt.fill_between(years_forecast, cost_p05, cost_p95, color='tab:orange', alpha=0.1, label='95% Prediction Interval')

plt.plot(
    [years_actual_median[-1], years_forecast_median[0]],
    [costs_actual_median[-1], costs_forecast_median[0]],
    '--', color='tab:orange', linewidth=2.5
)

# Log y-axis and ticks
plt.yscale('log')
plt.yticks([10, 100, 1000, 10000, 100000], fontsize=16)
plt.xticks(fontsize=16)
plt.ylim(10, 100000)

# Labels and title
plt.title("Stochastic Wright’s Law Forecast for PEM Electrolyzer CAPEX (2024–2050)", fontsize=16)
plt.xlabel("Year", fontsize=16)
plt.ylabel("CAPEX (EUR/kW, log scale)", fontsize=16)

# Grid and legend
plt.grid(True, which='both', linestyle='--', alpha=0.6)
plt.legend(loc='upper left', fontsize=14)

# Final layout
plt.tight_layout()
plt.show()



#%% 6. EXPORT FORECAST TABLE TO EXCEL

# Define the output DataFrame
years_range = np.arange(2024, 2051)  # 2024 to 2050 inclusive
idx_range = [np.where(years_forecast == y)[0][0] for y in years_range]

# Extract data for table
cumul_prod = deploy_forecast[idx_range]   # Convert MW to GW
point_forecast = cost_median[idx_range]
ci_95_lower = cost_p05[idx_range]
ci_95_upper = cost_p95[idx_range]
ci_50_lower = cost_p25[idx_range]
ci_50_upper = cost_p75[idx_range]

# Create DataFrame
forecast_table = pd.DataFrame(
    data=[
        cumul_prod,
        point_forecast,
        ci_95_lower,
        ci_95_upper,
        ci_50_lower,
        ci_50_upper
    ],
    index=[
        'cumul_prod, GW',
        'point_forecast, $(2020)/kW',
        'forecast 95% C.I. lower bound, $(2020)/kW',
        'forecast 95% C.I. upper bound, $(2020)/kW',
        'forecast 50% C.I. lower bound, $(2020)/kW',
        'forecast 50% C.I. upper bound, $(2020)/kW'
    ],
    columns=years_range
)

# Export to Excel
output_path = "PEM_Cost_Forecast_Table.xlsx"
forecast_table.to_excel(output_path)
print(f"Forecast table saved to:\n{output_path}")

#%%#%% Print costs for 2030 and 2050
for target_year in [2030, 2050]:
    if target_year in years_forecast:
        idx = np.where(years_forecast == target_year)[0][0]
        lower_50 = cost_p25[idx]
        upper_50 = cost_p75[idx]
        lower_95 = cost_p05[idx]
        upper_95 = cost_p95[idx]
        median = cost_median[idx]
        
        print('PEM')
        print(f"{target_year}:")
        print(f"  Median forecast: {median:.2f} EUR/kW")
        print(f"  50% prediction interval: {lower_50:.2f}–{upper_50:.2f} EUR/kW")
        print(f"  95% prediction interval: {lower_95:.2f}–{upper_95:.2f} EUR/kW\n")
    else:
        print(f"{target_year} not found in forecast years.")





